results_dir <- "/albona/nobackup2/yingxinl/melanoma_github/results/final"
figures_dir <- "/albona/nobackup2/yingxinl/melanoma_github/figures/final"
sourcedata_dir <- "/albona/nobackup2/yingxinl/melanoma_github/source_data/final"
dir.create(results_dir, recursive = TRUE)
dir.create(figures_dir, recursive = TRUE)
dir.create(sourcedata_dir, recursive = TRUE)
library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon) # Yingxin's personal package
library(pheatmap)
library(reshape2)
library(gridExtra)
library(RColorBrewer)
library(UpSetR)
library(scattermore)
library(scater)
library(scran)
library(ggridges)
library(rcartocolor)
library(Rtsne)
library(ggalluvial)
library(ggrepel)
library(BiocParallel)
library(BiocSingular)
library(BiocNeighbors)
library(openxlsx)
ggplot2::theme_set(theme_bw() + theme_yx() +
theme(axis.text.y = element_text(color = "black"),
axis.text.x = element_text(color = "black"),
axis.ticks.x = element_line(color = "black"),
axis.ticks.y = element_line(color = "black")))
sce_melanoma <- readRDS(file.path(sourcedata_dir, "sce_melanoma_raw.rds"))
nUMI <- colSums(counts(sce_melanoma))
summary(nUMI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 223 2251 9524 13574 20104 188737
nGene <- colSums(counts(sce_melanoma) != 0)
summary(nGene)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 57 981 3019 3052 4747 10662
mt_genes <- grep("MT-", rowData(sce_melanoma)[, "Symbol"])
MT_prop <- colSums(counts(sce_melanoma)[mt_genes, ])/nUMI
sce_melanoma$nUMI <- nUMI
sce_melanoma$nGene <- nGene
sce_melanoma$MT_prop <- MT_prop
df_toPlot <- moon::makeMoonDF(sce_melanoma)
g1 <- ggplot(df_toPlot, aes(x = nUMI)) +
geom_histogram(bins = 50) +
scale_color_viridis_d() +
theme(aspect.ratio = 0.6) +
scale_x_log10() +
geom_vline(xintercept = 500, color = "red")+
geom_vline(xintercept = 80000, color = "red")
g2 <- ggplot(df_toPlot, aes(x = nGene)) +
geom_histogram(bins = 50) +
scale_color_viridis_d() +
theme(aspect.ratio = 0.6) +
scale_x_log10() +
geom_vline(xintercept = 400, color = "red")
g3 <- ggplot(df_toPlot, aes(x = MT_prop)) +
geom_histogram(bins = 50) +
scale_color_viridis_d() +
theme(aspect.ratio = 0.6) +
geom_vline(xintercept = 0.2, color = "red") +
xlab("MT%")
ggarrange(g1, g2, g3, ncol = 1, nrow = 3, align = "hv")
ggsave(file.path(figures_dir, "qc.pdf"), width = 6, height = 10)
saveRDS(df_toPlot[, c("nUMI", "nGene", "MT_prop")], file = file.path(sourcedata_dir, "qc.rds"))
keep <- nUMI < 80000 & nUMI > 500 & nGene > 400 & MT_prop < 0.2
table(keep)
## keep
## FALSE TRUE
## 24233 168599
table(keep, sce_melanoma$Sample)
##
## keep SCC16_0069_B230216_DMSO SCC16_0069_B230216_DT SCC16_0500_B101017_DMSO
## FALSE 596 835 620
## TRUE 7253 7480 13036
##
## keep SCC16_0500_B101017_DT SCC20_0352_B291020_DMSO SCC20_0352_B291020_DT
## FALSE 1131 703 632
## TRUE 7514 15321 11986
##
## keep SMU09_0157_B110419_DMSO SMU09_0157_B110419_DT SMU17_0075_B070921_DMSO
## FALSE 2630 2265 1075
## TRUE 7791 7560 8286
##
## keep SMU17_0075_B070921_DT SMU17_0132_B180418_DMSO SMU17_0132_B180418_DT
## FALSE 1192 752 1737
## TRUE 5964 6277 6424
##
## keep SMU17_0209_B130617_DMSO SMU17_0209_B130617_DT SMU18_0017_B110518_DMSO
## FALSE 1216 2223 1412
## TRUE 8427 6129 7218
##
## keep SMU18_0017_B110518_DT SMU18_0283_B090119_DMSO SMU18_0283_B090119_DT
## FALSE 990 1098 553
## TRUE 7483 9551 8099
##
## keep SMU19_0306_B290620_DMSO SMU19_0306_B290620_DT
## FALSE 1469 1104
## TRUE 9119 7681
Keel cells with nUMI < 80000 & nUMI > 500 & nGene > 400 & MT_prop < 0.2
sce_melanoma <- sce_melanoma[, keep]
sce_melanoma <- sce_melanoma[rowSums(counts(sce_melanoma)) != 0, ]
sce_melanoma
## class: SingleCellExperiment
## dim: 30986 168599
## metadata(1): Samples
## assays(1): counts
## rownames(30986): ENSG00000237613 ENSG00000186092 ... ENSG00000277475
## ENSG00000268674
## rowData names(3): ID Symbol Type
## colnames(168599): AAACCCAAGAAACTGT-1 AAACCCAAGAATCGAT-1 ...
## TTTGTTGGTTGACGGA-20 TTTGTTGTCGGTGTTA-20
## colData names(15): Sample Barcode ... nGene MT_prop
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
sce_melanoma <- logNormCounts(sce_melanoma)
refine_prediction <- readRDS("results/scClassify_selfTraining_prediction.rds")
refine_prediction <- refine_prediction[, 2]
label_len <- unlist(lapply(strsplit(refine_prediction, "_"), length))
refine_prediction[label_len > 1] <- "intermediate"
sce_melanoma$scClassify_celltype <- refine_prediction
celltype_color <- RColorBrewer::brewer.pal(12, "Paired")[c(1:10)]
celltype_color <- c(celltype_color, "grey70")
names(celltype_color) <- names(table(refine_prediction))
celltype_color["intermediate"] <- "grey90"
df_toPlot <- moon::makeMoonDF(sce_melanoma)
sce_melanoma$Patient_publish <- paste("Patient", as.numeric(factor(sce_melanoma$Patient)), sep = "#")
sce_melanoma$Sample_publish <- paste(sce_melanoma$Patient_publish,
sce_melanoma$Condition, sep = " ")
meta_data_publish <- colData(sce_melanoma)[, c("Patient_publish",
"Sample_publish",
"Patient",
"Sample",
"Sample.type",
"Condition")]
meta_data_publish <- unique(meta_data_publish)
meta_data_publish <- data.frame(meta_data_publish)
write.csv(meta_data_publish, file = "source_data/final/meta_remap_sample.csv")
rownames(sce_melanoma) <- rowData(sce_melanoma)$Symbol
hvg_fit <- scran::modelGeneVar(sce_melanoma, block = sce_melanoma$Sample)
hvg <- scran::getTopHVGs(hvg_fit, n = 2000)
use_bpparam <- MulticoreParam(workers = 10)
BiocParallel::bpprogressbar(use_bpparam) <- TRUE
filter <- grepl("RPL|RPS|^MT-|ERCC|MTMR|MTND", rownames(sce_melanoma))
gene_corMat <- qlcMatrix::cosSparse(t((logcounts(sce_melanoma)[filter, ])),
t((logcounts(sce_melanoma)[!filter, ])))
gene_corMat_max <- apply(gene_corMat, 2, max, na.rm = TRUE)
exclude_genes <- c(rownames(sce_melanoma)[filter],
names(gene_corMat_max)[gene_corMat_max > 0.8])
filter <- rownames(sce_melanoma) %in% exclude_genes
hvg <- setdiff(hvg, exclude_genes)
set.seed(2022)
sce_melanoma <- runPCA(sce_melanoma, ncomponents = 20, subset_row = hvg, BPPARAM = use_bpparam, BSPARAM = RandomParam())
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
##
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
##
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
##
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
##
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
##
##
|
| | 0%
|
|======== | 11%
|
|================ | 22%
|
|======================= | 33%
|
|=============================== | 44%
|
|======================================= | 56%
|
|=============================================== | 67%
|
|====================================================== | 78%
|
|============================================================== | 89%
|
|======================================================================| 100%
set.seed(2022)
sce_melanoma <- runUMAP(sce_melanoma, dimred = "PCA", min_dist = 0.3, verbose = TRUE)
df_toPlot <- moon::makeMoonDF(sce_melanoma)
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = scClassify_celltype)) +
geom_scattermore() +
scale_color_manual(values = celltype_color) +
theme(aspect.ratio = 1) +
labs(color = "")
ggsave(file.path(figures_dir, "umap_celltype.pdf"), width = 8, height = 6)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "scClassify_celltype")],
file = file.path(sourcedata_dir, "umap_celltype.rds"))
ggplot(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ],
aes(x = factor(Sample, levels = rev(unique(df_toPlot$Sample2))),
fill = scClassify_celltype)) +
geom_bar(position = "fill") +
scale_fill_manual(values = celltype_color) +
theme(aspect.ratio = 0.4) +
labs(fill = "") +
coord_flip() +
facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
xlab("") +
ylab("Cell type proportion")
ggsave(file.path(figures_dir, "celltype_proportion.pdf"), width = 8, height = 4)
saveRDS(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"),
c("UMAP1", "UMAP2", "scClassify_celltype", "Sample", "Sample.type")],
file = file.path(sourcedata_dir, "celltype_proportion.rds"))
ggplot(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ],
aes(x = factor(Sample_publish, levels = rev(sort(unique(df_toPlot$Sample_publish)))),
fill = scClassify_celltype)) +
geom_bar(position = "fill") +
scale_fill_manual(values = celltype_color) +
theme(aspect.ratio = 0.4) +
labs(fill = "") +
coord_flip() +
facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
xlab("") +
ylab("Cell type proportion")
ggsave(file.path(figures_dir, "celltype_proportion_publish.pdf"), width = 8, height = 4)
saveRDS(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"),
c("UMAP1", "UMAP2", "scClassify_celltype", "Sample_publish", "Sample.type")],
file = file.path(sourcedata_dir, "celltype_proportion_publish.rds"))
celltype_prop <- table(df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ]$scClassify_celltype,
df_toPlot[!df_toPlot$scClassify_celltype %in% c("intermediate", "unassigned"), ]$Sample)
celltype_prop <- apply(celltype_prop, 2, function(x) x/sum(x))
celltype_prop <- melt(celltype_prop)
colnames(celltype_prop) <- c("Celltype", "Sample", "Prop")
sample_meta <- unique(df_toPlot[, c("Sample", "Sample.type", "Condition", "Patient",
"Sample_publish", "Patient_publish")])
celltype_prop <- merge(celltype_prop, sample_meta, by = "Sample")
ggplot(celltype_prop, aes(x = Condition, y = Prop * 100, color = Sample.type)) +
geom_line(aes(group = Patient), color = "black") +
geom_point() +
facet_wrap(~Celltype, nrow = 1, scale = "free") +
scale_color_brewer(palette = "Dark2") +
theme(aspect.ratio = 1, legend.position = "bottom") +
ylab("Cell type proportion")
ggsave(file.path(figures_dir, "celltype_proportion_dots.pdf"), width = 13, height = 4)
saveRDS(celltype_prop,
file = file.path(sourcedata_dir, "celltype_proportion_dots.rds"))
ggplot(celltype_prop, aes(x = Condition, y = Prop * 100, color = Condition)) +
geom_line(aes(group = Patient), color = "black") +
geom_point() +
facet_grid(Celltype~Sample.type, scale = "free") +
scale_color_brewer(palette = "Set1") +
theme(aspect.ratio = 1, legend.position = "bottom") +
ylab("Cell type proportion")
ggsave(file.path(figures_dir, "celltype_proportion_dots_v2.pdf"), width = 8, height = 13)
ggplot(celltype_prop, aes(x = Sample.type, y = Prop * 100, color = Condition)) +
geom_boxplot() +
facet_wrap(~Celltype, nrow = 1, scale = "free") +
scale_color_brewer(palette = "Set1") +
theme(aspect.ratio = 1, legend.position = "bottom") +
ylab("Cell type proportion")
ggsave(file.path(figures_dir, "celltype_proportion_boxplot.pdf"), width = 13, height = 4)
lmer_res <- list()
for (i in levels(celltype_prop$Celltype)) {
df_subset <- celltype_prop[celltype_prop$Celltype == i, ]
lmer_res[[i]] <- lmerTest::lmer(Prop ~ Sample.type * Condition + (1|Patient), data = df_subset)
}
lapply(lmer_res, function(x) round(summary(x)$coefficients, 3))
## $`B Cells`
## Estimate Std. Error df t value
## (Intercept) 0.038 0.039 8.052 0.981
## Sample.typeTreatment naïve 0.046 0.055 8.052 0.848
## ConditionDT 0.002 0.004 8.000 0.398
## Sample.typeTreatment naïve:ConditionDT -0.006 0.006 8.000 -0.972
## Pr(>|t|)
## (Intercept) 0.355
## Sample.typeTreatment naïve 0.421
## ConditionDT 0.701
## Sample.typeTreatment naïve:ConditionDT 0.359
##
## $`Endothelial Cells`
## Estimate Std. Error df t value
## (Intercept) 0 0 12.62 1.479
## Sample.typeTreatment naïve 0 0 12.62 -0.461
## ConditionDT 0 0 8.00 0.602
## Sample.typeTreatment naïve:ConditionDT 0 0 8.00 -0.152
## Pr(>|t|)
## (Intercept) 0.164
## Sample.typeTreatment naïve 0.653
## ConditionDT 0.564
## Sample.typeTreatment naïve:ConditionDT 0.883
##
## $Fibroblasts
## Estimate Std. Error df t value
## (Intercept) 0.035 0.014 10.23 2.545
## Sample.typeTreatment naïve -0.031 0.019 10.23 -1.604
## ConditionDT -0.009 0.010 8.00 -0.949
## Sample.typeTreatment naïve:ConditionDT 0.010 0.014 8.00 0.739
## Pr(>|t|)
## (Intercept) 0.029
## Sample.typeTreatment naïve 0.139
## ConditionDT 0.370
## Sample.typeTreatment naïve:ConditionDT 0.481
##
## $Macrophages
## Estimate Std. Error df t value
## (Intercept) 0.046 0.016 9.846 2.935
## Sample.typeTreatment naïve -0.024 0.022 9.846 -1.084
## ConditionDT -0.005 0.010 8.000 -0.454
## Sample.typeTreatment naïve:ConditionDT 0.004 0.014 8.000 0.249
## Pr(>|t|)
## (Intercept) 0.015
## Sample.typeTreatment naïve 0.304
## ConditionDT 0.662
## Sample.typeTreatment naïve:ConditionDT 0.809
##
## $Monocyte
## Estimate Std. Error df t value
## (Intercept) 0.022 0.014 8.063 1.587
## Sample.typeTreatment naïve -0.019 0.020 8.063 -0.957
## ConditionDT -0.002 0.002 8.000 -0.989
## Sample.typeTreatment naïve:ConditionDT 0.001 0.002 8.000 0.306
## Pr(>|t|)
## (Intercept) 0.151
## Sample.typeTreatment naïve 0.366
## ConditionDT 0.352
## Sample.typeTreatment naïve:ConditionDT 0.767
##
## $`NK Cells`
## Estimate Std. Error df t value
## (Intercept) 0.009 0.003 8.217 2.802
## Sample.typeTreatment naïve -0.005 0.005 8.217 -1.124
## ConditionDT 0.002 0.001 8.000 2.681
## Sample.typeTreatment naïve:ConditionDT -0.001 0.001 8.000 -0.826
## Pr(>|t|)
## (Intercept) 0.023
## Sample.typeTreatment naïve 0.293
## ConditionDT 0.028
## Sample.typeTreatment naïve:ConditionDT 0.432
##
## $`Plasma Cells`
## Estimate Std. Error df t value
## (Intercept) 0.001 0.001 9.231 1.207
## Sample.typeTreatment naïve 0.001 0.001 9.231 1.322
## ConditionDT 0.000 0.000 8.000 -0.257
## Sample.typeTreatment naïve:ConditionDT -0.001 0.001 8.000 -1.181
## Pr(>|t|)
## (Intercept) 0.257
## Sample.typeTreatment naïve 0.218
## ConditionDT 0.804
## Sample.typeTreatment naïve:ConditionDT 0.272
##
## $`T cells`
## Estimate Std. Error df t value
## (Intercept) 0.151 0.058 8.866 2.609
## Sample.typeTreatment naïve 0.041 0.082 8.866 0.507
## ConditionDT 0.060 0.026 8.000 2.293
## Sample.typeTreatment naïve:ConditionDT -0.047 0.037 8.000 -1.257
## Pr(>|t|)
## (Intercept) 0.029
## Sample.typeTreatment naïve 0.625
## ConditionDT 0.051
## Sample.typeTreatment naïve:ConditionDT 0.244
##
## $`Tumor cells`
## Estimate Std. Error df t value
## (Intercept) 0.698 0.091 8.844 7.641
## Sample.typeTreatment naïve -0.010 0.129 8.844 -0.078
## ConditionDT -0.048 0.041 8.000 -1.183
## Sample.typeTreatment naïve:ConditionDT 0.040 0.058 8.000 0.689
## Pr(>|t|)
## (Intercept) 0.000
## Sample.typeTreatment naïve 0.940
## ConditionDT 0.271
## Sample.typeTreatment naïve:ConditionDT 0.510
g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Sample_publish)) +
geom_scattermore() +
scale_color_tableau(palette = "Tableau 20") +
theme(aspect.ratio = 1) +
labs(color = "")
g1 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Patient_publish)) +
geom_scattermore() +
scale_color_tableau(palette = "Tableau 10") +
theme(aspect.ratio = 1) +
labs(color = "")
g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Condition)) +
geom_scattermore() +
scale_color_brewer(palette = "Set1") +
theme(aspect.ratio = 1) +
labs(color = "")
g4 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = Sample.type)) +
geom_scattermore() +
scale_color_brewer(palette = "Dark2") +
theme(aspect.ratio = 1) +
labs(color = "")
ggarrange(g3, g1, g2, g4, ncol = 2, nrow = 2, align = "hv")
ggsave(file.path(figures_dir, "umap_supp.pdf"), width = 16, height = 12)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "Sample", "Patient", "Condition", "Sample.type")],
file = file.path(sourcedata_dir, "umap_supp.rds"))
flow_prop <- readxl::read_xls("data/Combined Flow - Copy.xls")
flow_prop <- flow_prop[-1, ]
flow_prop <- data.frame(flow_prop)
flow_prop <- flow_prop[-11, ]
flow_prop$Sample <- flow_prop[, 1]
flow_prop$Sample[flow_prop$Sample == "SCC16_0069 B23_02_16 M3_DMSO_009.fcs"] <- "SCC16_0069_B230216_DMSO"
flow_prop$Sample[flow_prop$Sample == "SCC16_0069 B23_02_16 M3_DT_010.fcs"] <- "SCC16_0069_B230216_DT"
flow_prop$Sample[flow_prop$Sample == "SCC20_0352 B29_10_20 Left Axillary_DMSO_005.fcs"] <- "SCC20_0352_B291020_DMSO"
flow_prop$Sample[flow_prop$Sample == "SCC20_0352 B29_10_20 Left Axillary_DT_006.fcs"] <- "SCC20_0352_B291020_DT"
flow_prop$Sample[flow_prop$Sample == "SMU09_0157 B11_04_2019 Chest wall_DMSO_023.fcs"] <- "SMU09_0157_B110419_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU09_0157 B11_04_2019 Chest wall_DT_024.fcs"] <- "SMU09_0157_B110419_DT"
flow_prop$Sample[flow_prop$Sample == "SMU17-0209 B13_06_2017 Small Bowel_DMSO_005.fcs"] <- "SMU17_0209_B130617_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU17-0209 B13_06_2017 Small Bowel_DT_006.fcs"] <- "SMU17_0209_B130617_DT"
flow_prop$Sample[flow_prop$Sample == "SMU18_0283 B09_01_2019 LI LN_Full DMSO_007.fcs"] <- "SMU18_0283_B090119_DMSO"
flow_prop$Sample[flow_prop$Sample == "SMU18_0283 B09_01_2019 LI LN_Full DT_008.fcs"] <- "SMU18_0283_B090119_DT"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0500 Control_016.fcs"] <- "SCC16_0500_B101017_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0500 DT_017.fcs"] <- "SCC16_0500_B101017_DT"
flow_prop$Sample[flow_prop$Sample == "Treatment 0306_Full stain Control_016.fcs"] <- "SMU19_0306_B290620_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment 0306_Full stain DT_017.fcs"] <- "SMU19_0306_B290620_DT"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0132 Control_014.fcs"] <- "SMU17_0132_B180418_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0132 DT_015.fcs"] <- "SMU17_0132_B180418_DT"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0017 Control_016.fcs"] <- "SMU18_0017_B110518_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment TD_0017 DT_017.fcs"] <- "SMU18_0017_B110518_DT"
flow_prop$Sample[flow_prop$Sample == "Treatment 0075_Distal Control_014.fcs"] <- "SMU17_0075_B070921_DMSO"
flow_prop$Sample[flow_prop$Sample == "Treatment 0075_Distal DT_015.fcs"] <- "SMU17_0075_B070921_DT"
rownames(flow_prop) <- flow_prop$Sample
t_flow <- flow_prop[, "Total.T.cells...Live.dead"]
t_flow <- as.numeric(t_flow)
names(t_flow) <- rownames(flow_prop)
t_celltype_prop <- celltype_prop[celltype_prop$Celltype == "T cells", ]
plot(t_celltype_prop$Prop, t_flow[as.character(t_celltype_prop$Sample)])
df <- cbind(t_celltype_prop, flow = t_flow[as.character(t_celltype_prop$Sample)])
g_T <- ggplot(df, aes(x = Prop * 100, y = flow)) +
geom_point(aes(color = Sample_publish), size = 3, alpha = 1) +
theme(aspect.ratio = 1) +
xlab("% of T cells in single-cell") +
ylab("% of T cells in flow cyto") +
scale_color_tableau(palette = "Tableau 20") +
geom_text(aes(x = 10, y = 30, label = paste("cor =", round(cor(df$Prop * 100, df$flow), 2)))) +
labs(color = "")
g_T
cor.test(df$Prop * 100, df$flow)
##
## Pearson's product-moment correlation
##
## data: df$Prop * 100 and df$flow
## t = 4.5041, df = 18, p-value = 0.0002745
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4210162 0.8852757
## sample estimates:
## cor
## 0.7279195
ggsave(file.path(figures_dir, "T_cells_celltype_proportion_comparison.pdf"), width = 5, height = 4)
saveRDS(df,
file = file.path(sourcedata_dir, "T_cells_celltype_proportion.rds"))
tumor_flow <- flow_prop[, "X..SOX10..MLM...Live.Dead"]
tumor_flow <- gsub(" %", "", tumor_flow)
tumor_flow <- as.numeric(tumor_flow)
names(tumor_flow) <- rownames(flow_prop)
tumor_celltype_prop <- celltype_prop[celltype_prop$Celltype == "Tumor cells", ]
df <- cbind(tumor_celltype_prop, flow = tumor_flow[as.character(tumor_celltype_prop$Sample)])
g_tumor <- ggplot(df, aes(x = Prop * 100, y = flow)) +
geom_point(aes(color = Sample_publish), size = 3, alpha = 1) +
theme(aspect.ratio = 1) +
xlab("% of Tumor cells in single-cell") +
ylab("% of Tumor cells in flow cyto") +
scale_color_tableau(palette = "Tableau 20") +
geom_text(aes(x = 50, y = 90, label = paste("cor =", round(cor(df$Prop * 100, df$flow), 2)))) +
labs(color = "")
g_tumor
cor.test(df$Prop * 100, df$flow)
##
## Pearson's product-moment correlation
##
## data: df$Prop * 100 and df$flow
## t = 4.1378, df = 18, p-value = 0.000618
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3700097 0.8714695
## sample estimates:
## cor
## 0.698208
ggsave(file.path(figures_dir, "Tumor_cells_celltype_proportion_comparison.pdf"), width = 5, height = 4)
saveRDS(df,
file = file.path(sourcedata_dir, "Tumor_cells_celltype_proportion.rds"))
legend <- get_legend(
# create some space to the left of the legend
g_T + theme(legend.position = "bottom", legend.box.margin = margin(0, 0, 0, 12))
)
library(cowplot)
plot_grid(plot_grid(g_T + theme(legend.position = "none"),
g_tumor + theme(legend.position = "none"),
labels = c('A', 'B'), align = "hv"),
legend, ncol = 1, rel_heights = c(1, .1))
ggsave(file.path(figures_dir, "celltype_proportion_comparison.pdf"), width = 8, height = 6)
g <- lapply(c("CD3E", "MS4A1", "CD68", "CD14",
"FAP",
"SOX10", "AXL", "MITF", "NGFR"),
function(x) {
# o <- order(logcounts(sce_melanoma)[x, ])
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2,
color = logcounts(sce_melanoma)[x, ])) +
geom_scattermore() +
theme(aspect.ratio = 1) +
scale_color_gradientn(colors = colorRampPalette(c("grey90", brewer.pal(n = 7, name = "Reds")))(100)) +
labs(color = "", title = x)
})
ggarrange(plotlist = g, ncol = 3, nrow = 3, align = "hv")
ggsave(file.path(figures_dir, "celltype_marker_example.pdf"), width = 12, height = 10)
library(cellyx)
subset_idx <- !sce_melanoma$scClassify_celltype %in% c("intermediate", "unassigned", "Tumor cells")
limma_res <- doLimma(logcounts(sce_melanoma)[, subset_idx], sce_melanoma$scClassify_celltype[subset_idx])
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
limma_res <- lapply(limma_res, function(x) x[order(x$logFC, decreasing = TRUE), ])
wb <- createWorkbook()
lapply(seq_along(limma_res), function(i){
addWorksheet(wb = wb, sheetName = names(limma_res)[i])
writeData(wb, sheet = names(limma_res)[i],
data.frame(limma_res[[i]]))
})
## [[1]]
## [1] 0
##
## [[2]]
## [1] 0
##
## [[3]]
## [1] 0
##
## [[4]]
## [1] 0
##
## [[5]]
## [1] 0
##
## [[6]]
## [1] 0
##
## [[7]]
## [1] 0
##
## [[8]]
## [1] 0
saveWorkbook(wb, file.path(sourcedata_dir, paste0("immune_celltype_marker", ".xlsx")), overwrite = TRUE)
markers <- lapply(limma_res, function(x) rownames(x)[1:10])
aggExprs <- lapply(names(limma_res), function(x) {
idx <- sce_melanoma$scClassify_celltype %in% x
rowMeans(logcounts(sce_melanoma)[unlist(markers), idx])
})
aggExprs <- do.call(cbind, aggExprs)
colnames(aggExprs) <- names(limma_res)
rdbu <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
pheatmap(t(aggExprs), cluster_cols = FALSE, cluster_rows = FALSE,
scale = "column", color = rdbu,
border_color = NA,
file = file.path(figures_dir, "heatmap_immune_celltype_marker_aggExprs.pdf"),
height = 3, width = 10)
saveRDS(aggExprs, file.path(sourcedata_dir, paste0("immune_celltype_marker_aggExprs", ".rds")))
numbat_classify <- readRDS("/albona/nobackup2/yingxinl/melanoma/results/numbat_tumour_classification.rds")
table(numbat_classify, sce_melanoma$scClassify_celltype)
##
## numbat_classify B Cells Endothelial Cells Fibroblasts intermediate Macrophages
## normal 9616 12 1911 971 5028
## tumor 31 2 945 830 176
##
## numbat_classify Monocyte NK Cells Plasma Cells T cells Tumor cells unassigned
## normal 1549 1084 203 28437 2171 230
## tumor 139 7 49 166 114848 194
agree_prop <- sum(numbat_classify == "tumor" & sce_melanoma$scClassify_celltype == "Tumor cells")/sum(sce_melanoma$scClassify_celltype == "Tumor cells")
print(agree_prop)
## [1] 0.9814475
g1 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = scClassify_celltype %in% "Tumor cells")) +
geom_scattermore() +
scale_color_viridis_d() +
theme(aspect.ratio = 1) +
labs(color = "", title = "Tumor in scClassify")
g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = numbat_classify %in% "tumor")) +
geom_scattermore() +
scale_color_viridis_d() +
theme(aspect.ratio = 1) +
labs(color = "", title = "Tumor in Numbat")
g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2,
color = numbat_classify %in% "tumor" &
scClassify_celltype %in% "Tumor cells")) +
geom_scattermore() +
scale_color_viridis_d() +
theme(aspect.ratio = 1) +
labs(color = "", title = "Tumor in both")
ggarrange(g1, g2, g3, ncol = 3, nrow = 1, align = "hv")
ggsave(file.path(figures_dir, "umap_numbat.pdf"), width = 15, height = 6)
df_toPlot$numbat_classify <- numbat_classify
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "numbat_classify", "scClassify_celltype")],
file = file.path(sourcedata_dir, "umap_numbat.rds"))
tab <- table(numbat_classify == "tumor", sce_melanoma$scClassify_celltype == "Tumor cells")
print(tab)
##
## FALSE TRUE
## FALSE 49041 2171
## TRUE 2539 114848
tab <- melt(tab)
colnames(tab) <- c("Numbat", "scClassify", "Count")
ggplot(tab, aes(x = Numbat, y = factor(scClassify, levels = c(TRUE, FALSE)), fill = Count)) +
geom_tile() +
geom_text(aes(label = Count)) +
scale_fill_viridis_c() +
theme_minimal() +
theme(aspect.ratio = 1) +
ylab("scClassify")
ggsave(file.path(figures_dir, "confusion_numbat_scClassify.rds.pdf"), width = 4, height = 3)
saveRDS(tab,
file = file.path(sourcedata_dir, "confusion_numbat_scClassify.rds"))
mclust::adjustedRandIndex(numbat_classify == "tumor",
sce_melanoma$scClassify_celltype == "Tumor cells")
## [1] 0.888796
saveRDS(sce_melanoma, file = file.path(sourcedata_dir, "sce_melanoma.rds"))
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Sydney
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] cellyx_0.1.0 cowplot_1.1.1
## [3] openxlsx_4.2.5.2 BiocNeighbors_1.18.0
## [5] BiocSingular_1.16.0 BiocParallel_1.34.2
## [7] ggrepel_0.9.3 ggalluvial_0.12.5
## [9] Rtsne_0.16 rcartocolor_2.1.1
## [11] ggridges_0.5.4 scran_1.28.2
## [13] scater_1.28.0 scuttle_1.10.2
## [15] scattermore_1.2 UpSetR_1.4.0
## [17] RColorBrewer_1.1-3 gridExtra_2.3
## [19] reshape2_1.4.4 pheatmap_1.0.12
## [21] moon_0.1.0 ggpubr_0.6.0
## [23] ggthemes_4.2.4 ggplot2_3.4.3
## [25] dplyr_1.1.2 SingleCellExperiment_1.22.0
## [27] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [29] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
## [31] IRanges_2.34.1 S4Vectors_0.38.1
## [33] BiocGenerics_0.46.0 MatrixGenerics_1.12.3
## [35] matrixStats_1.0.0
##
## loaded via a namespace (and not attached):
## [1] spatstat.sparse_3.0-2 bitops_1.0-7
## [3] httr_1.4.6 docopt_0.7.1
## [5] numDeriv_2016.8-1.1 tools_4.4.0
## [7] sctransform_0.3.5 backports_1.4.1
## [9] utf8_1.2.3 R6_2.5.1
## [11] lazyeval_0.2.2 uwot_0.1.16
## [13] withr_2.5.0 sp_2.0-0
## [15] progressr_0.14.0 cli_3.6.1
## [17] textshaping_0.3.6 spatstat.explore_3.2-1
## [19] labeling_0.4.2 slam_0.1-50
## [21] sass_0.4.9 Seurat_4.3.0.1
## [23] spatstat.data_3.0-1 pbapply_1.7-2
## [25] systemfonts_1.0.4 parallelly_1.36.0
## [27] limma_3.56.2 readxl_1.4.3
## [29] rstudioapi_0.15.0 generics_0.1.3
## [31] ica_1.0-3 spatstat.random_3.1-5
## [33] car_3.1-2 zip_2.3.0
## [35] DoubletFinder_2.0.3 Matrix_1.6-0
## [37] ggbeeswarm_0.7.2 fansi_1.0.4
## [39] abind_1.4-5 lifecycle_1.0.3
## [41] yaml_2.3.7 edgeR_3.42.4
## [43] carData_3.0-5 SparseArray_1.4.8
## [45] grid_4.4.0 promises_1.2.0.1
## [47] dqrng_0.3.0 crayon_1.5.2
## [49] miniUI_0.1.1.1 lattice_0.22-6
## [51] beachmat_2.16.0 pillar_1.9.0
## [53] knitr_1.43 metapod_1.8.0
## [55] boot_1.3-30 future.apply_1.11.0
## [57] codetools_0.2-20 leiden_0.4.3
## [59] glue_1.6.2 data.table_1.14.8
## [61] vctrs_0.6.3 png_0.1-8
## [63] cellranger_1.1.0 gtable_0.3.3
## [65] cachem_1.0.8 xfun_0.39
## [67] S4Arrays_1.4.1 mime_0.12
## [69] qlcMatrix_0.9.7 survival_3.5-3
## [71] statmod_1.5.0 bluster_1.10.0
## [73] ellipsis_0.3.2 fitdistrplus_1.1-11
## [75] ROCR_1.0-11 nlme_3.1-164
## [77] RcppAnnoy_0.0.21 bslib_0.7.0
## [79] irlba_2.3.5.1 vipor_0.4.5
## [81] KernSmooth_2.23-22 colorspace_2.1-0
## [83] tidyselect_1.2.0 compiler_4.4.0
## [85] DelayedArray_0.26.7 plotly_4.10.2
## [87] scales_1.2.1 lmtest_0.9-40
## [89] stringr_1.5.0 digest_0.6.33
## [91] goftest_1.2-3 spatstat.utils_3.0-3
## [93] minqa_1.2.5 sparsesvd_0.2-2
## [95] rmarkdown_2.23 XVector_0.40.0
## [97] htmltools_0.5.8.1 pkgconfig_2.0.3
## [99] lme4_1.1-34 sparseMatrixStats_1.12.2
## [101] highr_0.10 fastmap_1.1.1
## [103] rlang_1.1.1 htmlwidgets_1.6.2
## [105] shiny_1.7.5 DelayedMatrixStats_1.22.1
## [107] farver_2.1.1 jquerylib_0.1.4
## [109] zoo_1.8-12 jsonlite_1.8.7
## [111] mclust_6.0.0 RCurl_1.98-1.12
## [113] magrittr_2.0.3 GenomeInfoDbData_1.2.10
## [115] patchwork_1.1.2 munsell_0.5.0
## [117] Rcpp_1.0.11 viridis_0.6.4
## [119] reticulate_1.30 stringi_1.7.12
## [121] zlibbioc_1.46.0 MASS_7.3-60.0.1
## [123] plyr_1.8.8 parallel_4.4.0
## [125] listenv_0.9.0 deldir_1.0-9
## [127] splines_4.4.0 tensor_1.5
## [129] locfit_1.5-9.8 igraph_1.5.0.1
## [131] spatstat.geom_3.2-4 ggsignif_0.6.4
## [133] ScaledMatrix_1.8.1 evaluate_0.21
## [135] SeuratObject_4.1.3 nloptr_2.0.3
## [137] httpuv_1.6.11 RANN_2.6.1
## [139] tidyr_1.3.0 purrr_1.0.1
## [141] polyclip_1.10-4 future_1.33.0
## [143] rsvd_1.0.5 broom_1.0.5
## [145] xtable_1.8-4 rstatix_0.7.2
## [147] later_1.3.1 viridisLite_0.4.2
## [149] ragg_1.2.5 tibble_3.2.1
## [151] lmerTest_3.1-3 beeswarm_0.4.0
## [153] cluster_2.1.6 globals_0.16.2